Dual CRISPR Screen Analysis

Step 2: Construct Filter

Amanda Birmingham, CCBB, UCSD (abirmingham@ucsd.edu)

Instructions

To run this notebook reproducibly, follow these steps:

  1. Click Kernel > Restart & Clear Output
  2. When prompted, click the red Restart & clear all outputs button
  3. Fill in the values for your analysis for each of the variables in the Input Parameters section
  4. Click Cell > Run All

Input Parameters


In [ ]:
g_num_processors = 3
g_trimmed_fastqs_dir = '~/dual_crispr/test_data/test_set_2'
g_filtered_fastqs_dir = '~/dual_crispr/test_outputs/test_set_2'
g_min_trimmed_grna_len = 19
g_max_trimmed_grna_len = 21
g_len_of_seq_to_match = 19

Automated Set-Up


In [ ]:
import inspect

import ccbb_pyutils.analysis_run_prefixes as ns_runs
import ccbb_pyutils.files_and_paths as ns_files
import ccbb_pyutils.notebook_logging as ns_logs


def describe_var_list(input_var_name_list):
    description_list =  ["{0}: {1}\n".format(name, eval(name)) for name in input_var_name_list]
    return "".join(description_list)


ns_logs.set_stdout_info_logger()

In [ ]:
g_trimmed_fastqs_dir = ns_files.expand_path(g_trimmed_fastqs_dir)
g_filtered_fastqs_dir = ns_files.expand_path(ns_runs.check_or_set(g_filtered_fastqs_dir, g_trimmed_fastqs_dir))
print(describe_var_list(['g_trimmed_fastqs_dir', 'g_filtered_fastqs_dir']))
ns_files.verify_or_make_dir(g_filtered_fastqs_dir)

Construct Filtering Functions


In [ ]:
import dual_crispr.scaffold_trim as trim
print(inspect.getsource(trim))

In [ ]:
import dual_crispr.count_filterer as fltr
print(inspect.getsource(fltr))

In [ ]:
import ccbb_pyutils.parallel_process_fastqs as ns_parallel

g_parallel_results = ns_parallel.parallel_process_paired_reads(g_trimmed_fastqs_dir, 
    trim.get_trimmed_suffix(trim.TrimType.FIVE_THREE), g_num_processors, 
    fltr.filter_pair_by_len, [g_min_trimmed_grna_len, g_max_trimmed_grna_len, 
    g_len_of_seq_to_match, g_filtered_fastqs_dir])

In [ ]:
print(ns_parallel.concatenate_parallel_results(g_parallel_results))

In [ ]:
print(ns_files.check_file_presence(g_trimmed_fastqs_dir, "", trim.get_trimmed_suffix(trim.TrimType.FIVE_THREE),
                                  check_failure_msg="Construct filtering failed to produce filtered file(s)."))